#REPLICATION MATERIALS FOR: 
#The Crucial Role of Race in 21st Century U.S. Political Realignment
#AUTHORS: MICHAEL BARBER AND JEREMY C. POPE
#PUBLIC OPINION QUARTERLY
#FOR QUESTIONS, CONTACT MBARBER@BYU.EDU


library(readstata13)

anes <- read.dta13("anes_timeseries_cdf_stata_20211118.dta")

anes$dem_pres <- ifelse(as.numeric(anes$VCF0704) == 1, 1, ifelse(as.numeric(anes$VCF0704) == 2, 0, NA))
table(anes$dem_pres)
anes$dem_house <- ifelse(as.numeric(anes$VCF0707) == 1, 1, ifelse(as.numeric(anes$VCF0707) == 2, 0, NA))
table(anes$dem_house)
anes$year <- anes$VCF0004
anes <- subset(anes, year %in% c(seq(1980, 2020, 4)))

anes$white <- ifelse(as.numeric(anes$VCF0105a) == 1, 1, 0)
anes$black <- ifelse(as.numeric(anes$VCF0105a) == 2, 1, 0)
anes$hispanic <- ifelse(as.numeric(anes$VCF0105a) == 5, 1, 0)

#education level
anes$educ <- anes$VCF0140a
table(anes$educ)

anes$income <- as.numeric(anes$VCF0114)
anes$income[anes$income == 0] <- NA

anes$education <- as.numeric(anes$VCF0140a)
anes$education[anes$education %in% c(1:2)] <- 1
anes$education[anes$education %in% c(3)] <- 2
anes$education[anes$education %in% c(4:5)] <- 3
anes$education[anes$education %in% c(6)] <- 4
anes$education[anes$education %in% c(7)] <- 5
anes$education[anes$education > 5] <- NA

w.cor <- NULL
for(i in 1:length(unique(anes$year))){
  
  year <- unique(anes$year)[i]  
  
  year.cor <- cor(anes$income[anes$white == 1 & anes$year == year], anes$education[anes$white == 1 & anes$year == year], use = "complete.obs")   
  
  w.cor <- c(w.cor, year.cor)  
  
  print(i)
}


tapply(anes$income[anes$black == 1], anes$year[anes$black == 1], length)
b.cor <- NULL
for(i in 1:length(unique(anes$year))){
  
  year <- unique(anes$year)[i]  
  
  year.cor <- cor(anes$income[anes$black == 1 & anes$year == year], anes$education[anes$black == 1 & anes$year == year], use = "complete.obs")   
  
  b.cor <- c(b.cor, year.cor)  
  
  print(i)
}


tapply(anes$income[anes$hispanic == 1], anes$year[anes$hispanic == 1], length)
h.cor <- NULL
for(i in 1:length(unique(anes$year))){
  
  year <- unique(anes$year)[i]  
  
  year.cor <- cor(anes$income[anes$hispanic == 1 & anes$year == year], anes$education[anes$hispanic == 1 & anes$year == year], use = "complete.obs")   
  
  h.cor <- c(h.cor, year.cor)  
  
  print(i)
}


all.cor <- NULL
for(i in 1:length(unique(anes$year))){
  
  year <- unique(anes$year)[i]  
  
  year.cor <- cor(anes$income[anes$year == year], anes$education[anes$year == year], use = "complete.obs")   
  
  all.cor <- c(all.cor, year.cor)  
  
  print(i)
}


#FIGURE 1
years <- seq(1980, 2020, 4)
plot(years, w.cor, type = "l", xlim = c(1980, 2024), ylim = c(.10, .6), axes =  F, ylab = "Correlation", xlab = "Year", main = "Correlation between Income and Education")
axis(side = 1, at = seq(1980, 2020, 4), cex = .7)
axis(side = 2, at = seq(.1, .8, .1), las = 2)
points(years, w.cor, pch = 16)
lines(years[8:11], b.cor[8:11], lty = 2)
points(years[8:11], b.cor[8:11], pch = 15)
lines(years[8:11], h.cor[8:11], lty = 3)
points(years[8:11], h.cor[8:11], pch = 17)
lines(years, all.cor, lty = 4)
points(years, all.cor, pch = 18)
text(2020, w.cor[11]-.01, "White", pos = 4, cex = .7)
text(2020, b.cor[11], "Black", pos = 4, cex = .7)
text(2020, h.cor[11], "Hispanic", pos = 4, cex = .7)
text(2020, all.cor[11]+.01, "Combined", pos = 4, cex = .7)
box()

#####################################################################

#Figure 2a
anes$high.income <- ifelse(anes$income %in% c(4, 5), 1, 0)
anes$low.income <- ifelse(anes$income %in% c(1, 2), 1, 0)

anes$high.educ <- ifelse(anes$educ %in% c(6, 7), 1, 0)
anes$low.educ <- ifelse(anes$educ %in% c(1, 2, 3), 1, 0)

year <- seq(1980, 2020, 4)

#income first
high.income.white.vote <- NULL
low.income.white.vote <- NULL
for(i in 1:length(year)){
  
  wtd.tbl <- wtd.table(x = anes$dem_pres[anes$year == year[i] & anes$white == 1 & anes$high.income == 1], weights = anes$VCF0009z[anes$year == year[i] & anes$white == 1 & anes$high.income == 1])
  model1 <- wtd.tbl/sum(wtd.tbl)
  
  high.income.white.vote <- rbind(high.income.white.vote, model1)
  
  wtd.tbl2 <- wtd.table(x = anes$dem_pres[anes$year == year[i] & anes$white == 1 & anes$low.income == 1], weights = anes$VCF0009z[anes$year == year[i] & anes$white == 1 & anes$low.income == 1])
  model2 <- wtd.tbl2/sum(wtd.tbl2)
  
  low.income.white.vote <- rbind(low.income.white.vote, model2)
}


income.high.minus.low.anes <- (high.income.white.vote[,2] - low.income.white.vote[,2])*100


#now education
high.educ.white.vote <- NULL
low.educ.white.vote <- NULL
for(i in 1:length(year)){
  
  wtd.tbl <- wtd.table(x = anes$dem_pres[anes$year == years[i] & anes$white == 1 & anes$high.educ == 1], weights = anes$VCF0009z[anes$year == years[i] & anes$white == 1 & anes$high.educ == 1])
  model1 <- wtd.tbl/sum(wtd.tbl)
  
  high.educ.white.vote <- rbind(high.educ.white.vote, model1)
  
  wtd.tbl2 <- wtd.table(x = anes$dem_pres[anes$year == years[i] & anes$white == 1 & anes$low.educ == 1], weights = anes$VCF0009z[anes$year == years[i] & anes$white == 1 & anes$low.educ == 1])
  model2 <- wtd.tbl2/sum(wtd.tbl2)
  
  low.educ.white.vote <- rbind(low.educ.white.vote, model2)
}

educ.high.minus.low.anes <- (high.educ.white.vote[,2] - low.educ.white.vote[,2])*100


#plot 1980-2020
plot(year, income.high.minus.low.anes, pch = 16, ylim = c(-30,30), axes = F, xlim = c(1980, 2028), xlab = "", ylab = "Democratic Vote Margin", main = "Difference in Vote Share Between 'high' and 'low' Groups\nPresidential Vote among Whites")
axis(side = 1, at = seq(1980, 2020, 4))
axis(side = 2, at = seq(-30, 30, 10), las = 2)
lines(year, income.high.minus.low.anes, col = "dark green", lwd = 3)
points(year, income.high.minus.low.anes, pch = 16)
abline(h=0, lty = 2)
lines(year, educ.high.minus.low.anes, col = "dark orange", lwd = 3)
points(year, educ.high.minus.low.anes, pch = 15)
box()
text(2021, income.high.minus.low.anes[11], "Income", pos = 4)
text(2021, educ.high.minus.low.anes[11], "Education", pos = 4)


###########################

#FIGURE 2b
year <- seq(1980, 2020, 4)

#income first
high.income.white.vote.house <- NULL
low.income.white.vote.house <- NULL
for(i in 1:length(year)){
  
  wtd.tbl <- wtd.table(x = anes$dem_house[anes$year == year[i] & anes$white == 1 & anes$high.income == 1], weights = anes$VCF0009z[anes$year == year[i] & anes$white == 1 & anes$high.income == 1])
  model1 <- wtd.tbl/sum(wtd.tbl)
  
  high.income.white.vote.house <- rbind(high.income.white.vote.house, model1)
  
  wtd.tbl2 <- wtd.table(x = anes$dem_house[anes$year == year[i] & anes$white == 1 & anes$low.income == 1], weights = anes$VCF0009z[anes$year == year[i] & anes$white == 1 & anes$low.income == 1])
  model2 <- wtd.tbl2/sum(wtd.tbl2)
  
  low.income.white.vote.house <- rbind(low.income.white.vote.house, model2)
}


income.high.minus.low.anes.house <- (high.income.white.vote.house[,2] - low.income.white.vote.house[,2])*100


#now education
high.educ.white.vote.house <- NULL
low.educ.white.vote.house <- NULL
for(i in 1:length(year)){
  
  wtd.tbl <- wtd.table(x = anes$dem_house[anes$year == years[i] & anes$white == 1 & anes$high.educ == 1], weights = anes$VCF0009z[anes$year == years[i] & anes$white == 1 & anes$high.educ == 1])
  model1 <- wtd.tbl/sum(wtd.tbl)
  
  high.educ.white.vote.house <- rbind(high.educ.white.vote.house, model1)
  
  wtd.tbl2 <- wtd.table(x = anes$dem_house[anes$year == years[i] & anes$white == 1 & anes$low.educ == 1], weights = anes$VCF0009z[anes$year == years[i] & anes$white == 1 & anes$low.educ == 1])
  model2 <- wtd.tbl2/sum(wtd.tbl2)
  
  low.educ.white.vote.house <- rbind(low.educ.white.vote.house, model2)
}

educ.high.minus.low.anes.house <- (high.educ.white.vote.house[,2] - low.educ.white.vote.house[,2])*100


#plot 1980-2020
plot(year, income.high.minus.low.anes.house, pch = 16, ylim = c(-30,30), axes = F, xlim = c(1980, 2028), xlab = "", ylab = "Democratic Vote Margin", main = "Difference in Vote Share Between 'high' and 'low' Groups\nCongressional Vote among Whites")
axis(side = 1, at = seq(1980, 2020, 4))
axis(side = 2, at = seq(-30, 30, 10), las = 2)
lines(year, income.high.minus.low.anes.house, col = "dark green", lwd = 3)
points(year, income.high.minus.low.anes.house, pch = 16)
abline(h=0, lty = 2)
lines(year, educ.high.minus.low.anes.house, col = "dark orange", lwd = 3)
points(year, educ.high.minus.low.anes.house, pch = 15)
box()
text(2021, income.high.minus.low.anes.house[11], "Income", pos = 4)
text(2021, educ.high.minus.low.anes.house[11], "Education", pos = 4)



#####################################################################
#####################################################################

#CCES
cces.cumulative <- read.dta13("CCES_cumulative_2006-2020.dta", convert.factors = F)

cces.cumulative.p <- subset(cces.cumulative, year %in% c(2008, 2012, 2016, 2020))
cces.cumulative.p$voted_pres_party[cces.cumulative.p$voted_pres_party %in% c(3,4)] <- NA

cces.cumulative.p$voted_pres_dem <- ifelse(cces.cumulative.p$voted_pres_party == 1, 1, 0)
cces.cumulative.p$voted_pres_rep <- ifelse(cces.cumulative.p$voted_pres_party == 2, 1, 0)


#income differences among Blacks
model1 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 2])
low.income.dem.vote <- model1$coefficients

model2 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 2])
high.income.dem.vote <- model2$coefficients

model3 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 2])
low.educ.dem.vote <- model3$coefficients

model4 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 2])
high.educ.dem.vote <- model4$coefficients

income.high.minus.low <- (high.income.dem.vote - low.income.dem.vote)*100
educ.high.minus.low <- (high.educ.dem.vote - low.educ.dem.vote)*100


#education differences among Blacks
model1 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 2])
low.income.rep.vote <- model1$coefficients

model2 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 2])
high.income.rep.vote <- model2$coefficients

model3 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 2])
low.educ.rep.vote <- model3$coefficients

model4 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 2,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 2])
high.educ.rep.vote <- model4$coefficients

income.high.minus.low.r <- (high.income.rep.vote - low.income.rep.vote)*100
educ.high.minus.low.r <- (high.educ.rep.vote - low.educ.rep.vote)*100


#Figure 3a
year <- c(2008,2012,2016,2020)
#MARGINS BETWEEN HIGH AND LOW GROUPS - BLACKS IN CCES
plot(year, income.high.minus.low, pch = 16, ylim = c(-30,30), axes = F, xlim = c(2008, 2025), xlab = "", ylab = "Democratic Vote Margin", main = "Difference in Vote Share Between 'high' and 'low' Groups\nPresidential Vote among Blacks (CCES Survey)")
axis(side = 1, at = seq(2008, 2020, 4))
axis(side = 2, at = seq(-30, 30, 10), las = 2)
lines(year, income.high.minus.low, col = "dark green", lwd = 3)
points(year, income.high.minus.low, pch = 16)
abline(h=0, lty = 2)
lines(year, educ.high.minus.low, col = "dark orange", lwd = 3)
points(year, educ.high.minus.low, pch = 15)
box()
text(2021, income.high.minus.low[4], "Income", pos = 4)
text(2021, educ.high.minus.low[4], "Education", pos = 4)



#########################################################
#income differences among latinos
model1 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 3])
low.income.dem.vote <- model1$coefficients

model2 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 3])
high.income.dem.vote <- model2$coefficients

model3 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 3])
low.educ.dem.vote <- model3$coefficients

model4 <- lm(voted_pres_dem ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 3])
high.educ.dem.vote <- model4$coefficients

income.high.minus.low <- (high.income.dem.vote - low.income.dem.vote)*100
educ.high.minus.low <- (high.educ.dem.vote - low.educ.dem.vote)*100


#education differences among latinos
model1 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc < 4 & cces.cumulative.p$race == 3])
low.income.rep.vote <- model1$coefficients

model2 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$faminc > 8 & cces.cumulative.p$faminc != 13 & cces.cumulative.p$race == 3])
high.income.rep.vote <- model2$coefficients

model3 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ < 3 & cces.cumulative.p$race == 3])
low.educ.rep.vote <- model3$coefficients

model4 <- lm(voted_pres_rep ~ -1 + as.factor(year), data = cces.cumulative.p[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 3,], weights = cces.cumulative.p$weight_cumulative[cces.cumulative.p$educ > 4 & cces.cumulative.p$race == 3])
high.educ.rep.vote <- model4$coefficients

income.high.minus.low.r <- (high.income.rep.vote - low.income.rep.vote)*100
educ.high.minus.low.r <- (high.educ.rep.vote - low.educ.rep.vote)*100

#Figure 3b
year <- c(2008,2012,2016,2020)
#MARGINS BETWEEN HIGH AND LOW GROUPS - LATINOS IN CCES
plot(year, income.high.minus.low, pch = 16, ylim = c(-30,30), axes = F, xlim = c(2008, 2025), xlab = "", ylab = "Democratic Vote Margin", main = "Difference in Vote Share Between 'high' and 'low' Groups\nPresidential Vote among Latinos (CCES Survey)")
axis(side = 1, at = seq(2008, 2020, 4))
axis(side = 2, at = seq(-30, 30, 10), las = 2)
lines(year, income.high.minus.low, col = "dark green", lwd = 3)
points(year, income.high.minus.low, pch = 16)
abline(h=0, lty = 2)
lines(year, educ.high.minus.low, col = "dark orange", lwd = 3)
points(year, educ.high.minus.low, pch = 15)
box()
text(2021, income.high.minus.low[4], "Income", pos = 4)
text(2021, educ.high.minus.low[4], "Education", pos = 4)





